import json
import os

# 输入和输出路径
input_file = "/mnt/vepfs/audio/share/wangjunyou/JiuZhang3.0/dataset/MMLU-Pro/test.jsonl"  # 你本地下载的 test.jsonl
output_file = "/mnt/vepfs/audio/share/wangjunyou/JiuZhang3.0/eval/data/MMLU-Pro/test.jsonl"

os.makedirs(os.path.dirname(output_file), exist_ok=True)

with open(input_file, "r", encoding="utf-8") as fin, open(output_file, "w", encoding="utf-8") as fout:
    for line in fin:
        data = json.loads(line)
        # 只保留 category 为 math 的行
        if data.get("category") == "math":
            if "idx" in data:
                del data["idx"]
            fout.write(json.dumps(data, ensure_ascii=False) + "\n")

print(f"已保存到 {output_file}")
